********************************************************************************
** AEA Registrations          
** Graph + Summary Statistic Replication                                  
********************************************************************************

 ** NOTES: This do file replicates a graph of AEA registrations over time and 
 ** summary statistic on the percent of recent registrations that include
 ** a pre-analysis plan. 
 **
 ** PUBLISHED: Evidence on Research Transparency in Economics (Forthcoming)
 ** Journal of Economic Perspectives, Miguel 2021
 **
 ** CODE WRITTEN BY:   Garrett Christensen, Edward Miguel, Sarah Stillman
 ** Date created: 15 May 2020

//Setup
clear all
set more off
version 15.1

//Set your cd
cd "C:\Users\sstillman\Dropbox (CEGA)\My PC (JW99JM2)\Documents\Transparency\JEP Paper"

//Read in Data from AEA-generated csv
insheet using AEAsearch2021.01.31.csv, names

** TO GET  THE LATEST VERSION OF THE REGISTRATION DATA
** JUST DO AN EMPTY SEARCH ON THE WEBSITE (https://www.socialscienceregistry.org/)
** AND CLICK TO DOWNLOAD THE CSV! 
** NO SCRAPING REQUIRED!

********************************************************************************
* GRAPH - AEA REGISTRATIONS OVER TIME
********************************************************************************

//Creating "Month and Year" variable from time/date of publication
//Dropping time 
gen tempdate=substr(publishedat,1,10)
replace tempdate = subinstr(tempdate, " 0","", .)
replace tempdate = subinstr(tempdate, " 1","", .)
replace tempdate = subinstr(tempdate, " 2","", .)
replace tempdate = subinstr(tempdate, " 3","", .)
replace tempdate = subinstr(tempdate, " 4","", .)
replace tempdate = subinstr(tempdate, " 5","", .)
replace tempdate = subinstr(tempdate, " 6","", .)
replace tempdate = subinstr(tempdate, " 7","", .)
replace tempdate = subinstr(tempdate, " 8","", .)
replace tempdate = subinstr(tempdate, " 9","", .)
replace tempdate = subinstr(tempdate, " ","", .)

//Reformating tempdate
gen regdate=date(tempdate,"MDY")
//Splitting date from month/year
gen regmonth=mofd(regdate)
format regmonth %tm
label var regmonth "Month and Year"
//Splitting month and year
gen month=month(regdate)
label var month "Month"
gen yr=year(regdate)
label var yr "Year"

//Turn months into quarters
gen yq = qofd(regdate)
format yq %tq
label var yq "Quarter and Year"

//Generate registration count per quarter
egen freq=count (yq), by (yq)
sort yq
quietly by yq:  gen dup = cond(_N==1,0,_n)
label var freq "AEA Registrations in Given Quarter"

//Generate cumulative registrations per quarter
gen freq2=freq
replace freq2=. if dup>1
gen cum_reg_qtr = sum(freq2)
sort cum_reg_qtr
label var cum_reg_qtr "Cumulative AEA Registrations"

//Create labels for min and max values
summ regmonth
gen mylabel2=string(cum_reg_qtr) if yq==r(min)	
replace mylabel2=string(freq) if (yr==2013 & month==5) | (yr==2013 & month==6)
summ yq
gen mylabel3=string(cum_reg_qtr) if yq==r(max)
gen mylabel4=mylabel2
replace mylabel4=string(freq) if mylabel3!="" 

//Create dual graph
** Bar graph with cumulative registrations
** Line graph with count per quarter
twoway (bar cum_reg_qtr yq, c(l) yaxis(2) yscale(alt) yscale(alt axis(2))fcolor(gs8) ///
lcolor(gs4) graphregion(color(white)) ytitle("Cumulative Registrations", axis(2)) ysc(titlegap(2)) ylabel(,nogrid) ///
tlabel(2013q2 "2013-q2" 2014q1 "2014-q1" 2015q1 "2015-q1" 2016q1 "2016-q1" 2017q1 "2017-q1" 2018q1 "2018-q1" 2019q1 "2019-q1" ///
 2020q1 "2020-q1" 2021q1 "2021-q1", angle(45))) ///
(connected freq2 yq, c(l) yaxis(1) xtitle(Year and Quarter) ytitle("New Registrations per Quarter") yscale(range(0 900)) ///
mlabel(mylabel4) mlabposition(11) mlabcolor(gs1) mc(gs3) msize(vsmall) lc(gs4) legend(off)) ///
(scatter cum_reg_qtr yq, c(l) yaxis(2) msymbol(none) mlabel(mylabel3) mlabposition(11) mlabc(gs6) lc(none))

//Update with date of download
graph save ./AEARegistrations_20210131.gph
graph export ./AEARegistrations_20210131.png

drop tempdate freq2 dup mylabel* regdate

********************************************************************************
* SUMMARY STAT - % REGISTRATIONS W/ PRE-ANALYSIS PLAN
********************************************************************************

rename analysisplandocuments pap

gen tempdate=substr(firstregisteredon,1,10)
gen regdate=date(tempdate,"MDY")
quietly: tab regdate

gen study_year = year(regdate)

//Generate indicator for inclusion of pap
gen has_pap = 1
replace has_pap = 0 if (pap=="None")
tab has_pap

//Exclude completed studies
gen tempend=substr(enddate,1,10)
gen enddate2=date(tempend,"MDY")
quietly: tab enddate2

gen completed_study=0
replace completed_study=1 if (enddate2<regdate)
tab completed_study

// PAPs by year, for studies registered before the study end date
bys study_year: tab has_pap if (completed_study==0)
tab has_pap if ((study_year>2016 & study_year<2022) & completed_study==0)

drop tempdate regdate tempend enddate2 study_year

label var has_pap "Has PAP"
label var completed_study "Completed Study"

